library(car)
library(mosaic)
library(DT)
library(tidyverse)
files <- dir()
rdat <- read.csv(grep(".csv", files, value=TRUE), header=TRUE)
as_tibble(rdat)
# A tibble: 500 × 11
y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10
<dbl> <dbl> <dbl> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.0989 -7.14 -0.494 1 7.84 0 -7.03 -3.62 -4.81 1.58 -3.20
2 -1.99 7.28 -1.10 0 -0.984 0 6.93 7.11 1.06 7.50 -6.81
3 -0.0511 -7.59 -0.256 1 3.20 1 -2.42 -0.864 -5.31 -1.75 6.85
4 -0.714 6.73 -3.57 1 6.23 1 -1.41 0.669 7.10 5.65 -1.58
5 0.554 -2.13 -2.77 1 5.35 0 7.38 -5.41 0.0705 -7.32 2.65
6 2.00 3.17 0.158 0 3.75 1 0.497 3.09 -7.25 -1.20 -7.60
7 -0.275 1.11 1.38 1 2.35 0 -2.58 4.76 6.20 -7.64 -5.56
8 0.266 -3.46 1.33 1 5.49 1 2.09 4.38 -4.85 -7.49 -5.63
9 -0.199 -7.82 0.993 1 -5.45 0 -6.05 -4.59 -5.01 -6.43 -6.47
10 15.7 -1.26 -78.6 1 -2.18 0 6.36 -4.74 1.06 7.08 -0.0902
# ℹ 490 more rows
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
rdat$y <- 1/rdat$y
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
rdat$x2 <- 1/rdat$x2
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
rdat$x9 <- 1/rdat$x9
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
rdat <- read.csv(grep(".csv", files, value=TRUE), header=TRUE)
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col=rgb(.8,.8,.8))
lm1 <- lm(y ~ x2*x5*I(x2^2), data=rdat)
summary(lm1)
##
## Call:
## lm(formula = y ~ x2 * x5 * I(x2^2), data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.529 -1.191 0.079 0.864 50.937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.591e-01 2.449e-01 -3.916 0.000103 ***
## x2 3.730e-01 2.193e-02 17.005 < 2e-16 ***
## x5 2.177e+00 3.529e-01 6.169 1.43e-09 ***
## I(x2^2) 5.018e-03 2.799e-04 17.928 < 2e-16 ***
## x2:x5 -2.801e-01 4.046e-02 -6.923 1.38e-11 ***
## x2:I(x2^2) -2.456e-05 1.040e-06 -23.607 < 2e-16 ***
## x5:I(x2^2) -1.283e-02 8.119e-04 -15.808 < 2e-16 ***
## x2:x5:I(x2^2) 5.817e-06 5.389e-06 1.079 0.280913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.88 on 492 degrees of freedom
## Multiple R-squared: 0.9858, Adjusted R-squared: 0.9856
## F-statistic: 4870 on 7 and 492 DF, p-value: < 2.2e-16
palette(c("skyblue","orange","green","purple","steelblue","red","green3","black"))
pairs(cbind(R=lm1$res, Fit=lm1$fit, rdat), pch=16, cex=1, panel=panel.smooth, col=as.factor(rdat$x5))
lm2 <- lm(y ~ x2 + I(x2^2) + I(x2^3) +
x5 + x5:x2 + x5:I(x2^2), data=rdat)
summary(lm2)
##
## Call:
## lm(formula = y ~ x2 + I(x2^2) + I(x2^3) + x5 + x5:x2 + x5:I(x2^2),
## data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.884 -1.175 0.080 0.899 50.713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.656e-01 2.449e-01 -3.943 9.20e-05 ***
## x2 3.732e-01 2.194e-02 17.015 < 2e-16 ***
## I(x2^2) 5.072e-03 2.754e-04 18.414 < 2e-16 ***
## I(x2^3) -2.434e-05 1.021e-06 -23.843 < 2e-16 ***
## x5 2.143e+00 3.515e-01 6.096 2.20e-09 ***
## x2:x5 -2.736e-01 4.002e-02 -6.838 2.39e-11 ***
## I(x2^2):x5 -1.214e-02 4.899e-04 -24.772 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.881 on 493 degrees of freedom
## Multiple R-squared: 0.9857, Adjusted R-squared: 0.9856
## F-statistic: 5679 on 6 and 493 DF, p-value: < 2.2e-16
ggplot(rdat, aes(x=x2, y=y, color=interaction(x5))) +
geom_point() +
geom_line(aes(y=lm2$fit)) +
facet_wrap(~interaction(x5))
ggplot(rdat, aes(x=x2, y=y, color=interaction(x5,x3))) +
geom_point() +
geom_line(aes(y=lm2$fit)) +
facet_wrap(~interaction(x5))
lm3 <- lm(y ~ x2 + I(x2^2) + I(x2^3) +
x5 + x5:x2 + x5:I(x2^2) +
x3 + x3:x2 +
x3:x5 + x3:x5:x2, data=rdat)
summary(lm3)
##
## Call:
## lm(formula = y ~ x2 + I(x2^2) + I(x2^3) + x5 + x5:x2 + x5:I(x2^2) +
## x3 + x3:x2 + x3:x5 + x3:x5:x2, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.648 -0.450 -0.263 0.162 20.193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.677e+00 2.811e-01 -5.965 4.70e-09 ***
## x2 2.362e-02 2.783e-02 0.849 0.39635
## I(x2^2) 7.671e-03 2.697e-04 28.448 < 2e-16 ***
## I(x2^3) -9.185e-06 1.236e-06 -7.430 4.88e-13 ***
## x5 3.590e+00 3.906e-01 9.191 < 2e-16 ***
## x3 1.220e+00 3.842e-01 3.175 0.00159 **
## x2:x5 -9.518e-02 4.692e-02 -2.028 0.04306 *
## I(x2^2):x5 -1.575e-02 5.168e-04 -30.482 < 2e-16 ***
## x2:x3 5.714e-01 3.530e-02 16.189 < 2e-16 ***
## x5:x3 -2.616e+00 5.512e-01 -4.746 2.73e-06 ***
## x2:x5:x3 -2.889e-01 6.336e-02 -4.560 6.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.056 on 489 degrees of freedom
## Multiple R-squared: 0.9912, Adjusted R-squared: 0.991
## F-statistic: 5526 on 10 and 489 DF, p-value: < 2.2e-16
ggplot(rdat, aes(x=x2, y=y, color=interaction(x5,x3))) +
geom_point() +
geom_line(aes(y=lm3$fit)) +
facet_wrap(~interaction(x5,x3))
lm4 <- lm(y ~ x2 + I(x2^2) + I(x2^3) +
x5 + x5:x2 + x5:I(x2^2) +
x3 + x3:x2 + x3:I(x2^2) +
x3:x5 + x3:x5:x2 + x3:x5:I(x2^2), data=rdat)
summary(lm4)
##
## Call:
## lm(formula = y ~ x2 + I(x2^2) + I(x2^3) + x5 + x5:x2 + x5:I(x2^2) +
## x3 + x3:x2 + x3:I(x2^2) + x3:x5 + x3:x5:x2 + x3:x5:I(x2^2),
## data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.751e-13 1.500e-16 1.950e-15 5.960e-15 1.070e-13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.000e+00 3.340e-15 -5.988e+14 < 2e-16 ***
## x2 1.000e-03 3.315e-16 3.017e+12 < 2e-16 ***
## I(x2^2) 1.000e-02 3.435e-18 2.911e+15 < 2e-16 ***
## I(x2^3) 4.114e-20 1.548e-20 2.657e+00 0.00814 **
## x5 4.000e+00 4.640e-15 8.622e+14 < 2e-16 ***
## x3 2.000e+00 4.580e-15 4.367e+14 < 2e-16 ***
## x2:x5 -1.000e-03 5.742e-16 -1.741e+12 < 2e-16 ***
## I(x2^2):x5 -2.000e-02 6.614e-18 -3.024e+15 < 2e-16 ***
## x2:x3 -2.010e-01 6.244e-16 -3.219e+14 < 2e-16 ***
## I(x2^2):x3 -1.000e-02 5.725e-18 -1.747e+15 < 2e-16 ***
## x5:x3 -4.000e+00 6.616e-15 -6.046e+14 < 2e-16 ***
## x2:x5:x3 4.010e-01 8.944e-16 4.484e+14 < 2e-16 ***
## I(x2^2):x5:x3 2.000e-02 1.511e-17 1.324e+15 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.626e-14 on 487 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 3.3e+31 on 12 and 487 DF, p-value: < 2.2e-16
ggplot(rdat, aes(x=x2, y=y, color=interaction(x5,x3))) +
geom_point() +
geom_line(aes(y=lm4$fit)) +
facet_wrap(~interaction(x5,x3))
final.lm <- lm(y ~ x2 + I(x2^2) + I(x2^3) +
x5 + x5:x2 + x5:I(x2^2) +
x3 + x3:x2 + x3:I(x2^2) +
x3:x5 + x3:x5:x2 + x3:x5:I(x2^2), data=rdat)
summary(final.lm)
Call:
lm(formula = y ~ x2 + I(x2^2) + I(x2^3) + x5 + x5:x2 + x5:I(x2^2) +
x3 + x3:x2 + x3:I(x2^2) + x3:x5 + x3:x5:x2 + x3:x5:I(x2^2),
data = rdat)
Residuals:
Min 1Q Median 3Q Max
-6.751e-13 1.500e-16 1.950e-15 5.960e-15 1.070e-13
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.000e+00 3.340e-15 -5.988e+14 < 2e-16 ***
x2 1.000e-03 3.315e-16 3.017e+12 < 2e-16 ***
I(x2^2) 1.000e-02 3.435e-18 2.911e+15 < 2e-16 ***
I(x2^3) 4.114e-20 1.548e-20 2.657e+00 0.00814 **
x5 4.000e+00 4.640e-15 8.622e+14 < 2e-16 ***
x3 2.000e+00 4.580e-15 4.367e+14 < 2e-16 ***
x2:x5 -1.000e-03 5.742e-16 -1.741e+12 < 2e-16 ***
I(x2^2):x5 -2.000e-02 6.614e-18 -3.024e+15 < 2e-16 ***
x2:x3 -2.010e-01 6.244e-16 -3.219e+14 < 2e-16 ***
I(x2^2):x3 -1.000e-02 5.725e-18 -1.747e+15 < 2e-16 ***
x5:x3 -4.000e+00 6.616e-15 -6.046e+14 < 2e-16 ***
x2:x5:x3 4.010e-01 8.944e-16 4.484e+14 < 2e-16 ***
I(x2^2):x5:x3 2.000e-02 1.511e-17 1.324e+15 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.626e-14 on 487 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 3.3e+31 on 12 and 487 DF, p-value: < 2.2e-16
\[ Y_i = \beta_0 + \beta_1 X_{2i} + \beta_2 X_{2i}^2 + \beta_3 X_{2i}^3 + \\ \beta_4 X_{5i} + \beta_5 X_{5i} X_{2i} + \beta_6 X_{5i} X_{2i}^2 + \\ \beta_7 X_{3i} + \beta_8 X_{3i} X_{2i} + \beta_9 X_{3i} X_{2i}^2 + \\ \beta_{10} X_{3i} X_{5i} + \beta_{11} X_{3i} X_{5i} X_{2i} + \beta_{12} X_{3i} X_{5i} X_{2i}^2 + \epsilon_i \]
plot(y ~ x2, data=rdat, col=interaction(x5,x3))
points(final.lm$fit ~ x2, data=rdat, col=interaction(x5,x3), pch=16, cex=0.5)
b <- coef(final.lm)
drawit <- function(x5=0, x3=0, i=1){
curve(b[1] + b[2]*x2 + b[3]*x2^2 + b[4]*x2^3 + b[5]*x5 + b[6]*x3 + b[7]*x2*x5 + b[8]*x2^2*x5 + b[9]*x2*x3 + b[10]*x2^2*x3 + b[11]*x5*x3 + b[12]*x2*x5*x3 + b[13]*x2^2*x5*x3, add=TRUE, xname="x2", col=palette()[i])
}
drawit(0,0,1)
drawit(1,0,2)
drawit(0,1,3)
drawit(1,1,4)
b
## (Intercept) x2 I(x2^2) I(x2^3) x5
## -2.000000e+00 1.000000e-03 1.000000e-02 4.114207e-20 4.000000e+00
## x3 x2:x5 I(x2^2):x5 x2:x3 I(x2^2):x3
## 2.000000e+00 -1.000000e-03 -2.000000e-02 -2.010000e-01 -1.000000e-02
## x5:x3 x2:x5:x3 I(x2^2):x5:x3
## -4.000000e+00 4.010000e-01 2.000000e-02
with(rdat, levels(interaction(x5,x3)))
## [1] "0.0" "1.0" "0.1" "1.1"